TODO

other domains:

  • AUD to be added
  • schizophrenia/psychosis: not enough self report symptom scales when Tim looked into it.
  • BPD?

Dependencies

library(dplyr)
library(readr)
library(tibble)
library(forcats)
library(proxy)
library(janitor)
library(metafor)
library(ggplot2)
library(ggstance)
library(scales)
library(knitr)
library(kableExtra)

Data

data_depression <- read_csv("../data/depression - Fried 2017/MatrixB.csv")     #Load dat for estimating Jaccard index (no difference between specific and compound symptoms)

data_anxiety <- read_csv("../data/anxiety - Wall & Lee 2022/anxiety_wall_lee_jaccard.csv")

data_psychosis <- read_csv("../data/psychosis - Bernardin et al. 2023/psychosis - Bernardin et al. 2023.csv") |> 
  janitor::clean_names() %>%
  mutate(across(everything(), ~ ifelse(. == 2, 1, .)))

data_hypomania <- read_csv("../data/hypomania - Chrobak et al. 2018/hypomania.csv") 

data_eatingdisorders <- read_csv("../data/eating disorders - Christensen Pacella et al. 2024/MatrixB_EDMeasures_5-26-2023.csv")

data_youth_ocd <- read_csv("../data/ocd - Visiontay et al. 2019/youth_ocd.csv") |> 
  janitor::clean_names() %>%
  mutate(across(everything(), ~ ifelse(. == 2, 1, .)))

# youth depression

# youth anxiety

Jaccard index interpretation

Real, R., & Vargas, J. M. (1996). The probabilistic basis of Jaccard’s index of similarity. Systematic Biology, 45(3), 380-385. DOI:10.1093/sysbio/45.3.380

  • Jaccard < 0.25: Typically considered low similarity, meaning that the two sets have few elements in common.
  • 0.25 ≤ Jaccard < 0.5: Moderate similarity, which indicates that the sets share some commonalities but still have many differences.
  • 0.5 ≤ Jaccard < 0.75: High similarity, meaning there is substantial overlap between the sets.
  • Jaccard ≥ 0.75: Very high similarity, with most elements being shared between the sets.

Mean Jaccard index by domain

cor_estimates <- function(.data){
  data_mat_t <- .data |>
    as.matrix() |> 
    t()
  
  jaccard_diff <- proxy::dist(data_mat_t, method = "Jaccard") |> as.matrix()
  jaccard_sim <- 1 - jaccard_diff
  
  jaccard_sim[!lower.tri(jaccard_sim)] <- NA
  
  mean_jaccard <- mean(jaccard_sim, na.rm = TRUE)
  se_jaccard <- plotrix::std.error(as.vector(jaccard_sim), na.rm = TRUE)
  
  return(data.frame(J_mean = mean_jaccard,
                    J_se = se_jaccard,
                    J_ci_lower = mean_jaccard - se_jaccard*1.96,
                    J_ci_upper = mean_jaccard + se_jaccard*1.96))
}

dat <- bind_rows(
  data_depression |>
    cor_estimates() |>
    mutate(domain = "Depression"),
  data_anxiety |>
    cor_estimates() |>
    mutate(domain = "Anxiety"),
  data_psychosis |>
    cor_estimates() |>
    mutate(domain = "Psychosis"),
  data_hypomania |>
    cor_estimates() |>
    mutate(domain = "(Hypo)mania"),
  data_youth_ocd |>
    cor_estimates() |>
    mutate(domain = "(Youth) OCD"),
  data_eatingdisorders |>
    cor_estimates() |>
    mutate(domain = "Eating Disorders")
) |>
  mutate(logit_J_mean = qlogis(J_mean),
         # Calculate SEs on the logit scale using the delta method
         # SE_logit_p = SE_p / [p * (1 - p)]
         logit_J_se = J_se / (J_mean * (1 - J_mean))) |>
  relocate(domain, .before = J_mean)

dat |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J_mean J_se J_ci_lower J_ci_upper logit_J_mean logit_J_se
Depression 0.41 0.02 0.37 0.46 -0.35 0.10
Anxiety 0.26 0.01 0.23 0.29 -1.06 0.07
Psychosis 0.19 0.01 0.17 0.22 -1.42 0.08
(Hypo)mania 0.41 0.03 0.36 0.46 -0.36 0.11
(Youth) OCD 0.38 0.02 0.34 0.42 -0.49 0.10
Eating Disorders 0.22 0.02 0.18 0.26 -1.29 0.12

Meta across domains

# fit meta
fit <- rma(yi = logit_J_mean, 
           vi = logit_J_se^2,
           data = dat,
           method = "REML")

# create a forest plot
plot_forest <- 
  metafor::forest(fit, 
                  transf = plogis,
                  slab = domain,
                  xlim = c(-0.8, 1.7),
                  at = c(0, .2, .4, .6, .8, 1),
                  efac = 1,
                  addpred = TRUE,
                  xlab = "Jaccard Similarity",
                  header = c("Domain", "J [95% CI]"),
                  refline = NA)

# table of results
results_predictions <- 
  bind_cols(
    fit |>
      predict(transf = plogis, level = 50) |>
      as_tibble() |>
      dplyr::select(meta_J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower_50 = pi.lb, pi_upper_50 = pi.ub),
    fit |>
      predict(transf = plogis, level = 95) |>
      as_tibble() |>
      dplyr::select(pi_lower_95 = pi.lb, pi_upper_95 = pi.ub)
  ) |>
  mutate_all(janitor::round_half_up, digits = 2)

results_predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
meta_J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
0.3 0.28 0.33 0.24 0.38 0.14 0.55

3-level meta-analyses by domain

escalc_jaccard <- function(.data, domain = NA) {
  data_mat_t <- .data |> 
    as.matrix() |> 
    t()

  jaccard_difference <- proxy::dist(data_mat_t, method = "Jaccard") |> as.matrix()
  jaccard_similarity <- 1 - jaccard_difference

  # Calculate SEs for each Jaccard similarity
  var_names <- rownames(data_mat_t)
  n_vars <- length(var_names)

  # Initialize the SE matrix
  se_matrix <- matrix(NA, nrow = n_vars, ncol = n_vars)
  rownames(se_matrix) <- var_names
  colnames(se_matrix) <- var_names

  # Compute SEs
  for (i in 1:n_vars) {
    for (j in 1:n_vars) {
      x <- data_mat_t[i, ]
      y <- data_mat_t[j, ]
      a <- sum(x == 1 & y == 1)
      b <- sum(x == 1 & y == 0)
      c <- sum(x == 0 & y == 1)
      n <- a + b + c
      J <- a / n
      SE <- sqrt(J * (1 - J) / n)
      se_matrix[i, j] <- SE
    }
  }

  # Dynamic epsilon for zero-and-one adjustment
  #epsilon <- 1 / n_vars  # Proportional to the number of variables
  epsilon <- 0.5 # common default, greater pull towards center and less influence of 0,1 values
  
  # Apply zero-and-one adjustment
  adjusted_jaccard_similarity <- (jaccard_similarity * (n_vars - 1) + epsilon) / n_vars

  # Logit transform
  logit_jaccard_similarity <- qlogis(adjusted_jaccard_similarity)

  # Calculate SEs on the logit scale using the delta method
  # SE_logit_p = SE_p / [p * (1 - p)]
  se_logit_jaccard_similarity <- se_matrix / (adjusted_jaccard_similarity * (1 - adjusted_jaccard_similarity))

  # Replace zero SEs with a principled lower bound
  min_se <- sqrt(epsilon * (1 - epsilon) / n_vars)
  se_logit_jaccard_similarity[se_logit_jaccard_similarity == 0] <- min_se

  # Replace infinite values resulting from division by zero
  se_logit_jaccard_similarity[!is.finite(se_logit_jaccard_similarity)] <- NA

  # Create variable pair names
  m <- adjusted_jaccard_similarity
  ## Get indices and values
  indices <- which(lower.tri(m), arr.ind = TRUE)
  vec <- m[lower.tri(m)]
  ## Create variable pair names
  variable_pairs <- paste(rownames(m)[indices[, 1]], colnames(m)[indices[, 2]], sep = " - ")

  # Create a data frame for effect sizes
  dat_es <- tibble(
    domain = domain,
    J = jaccard_similarity[lower.tri(jaccard_similarity)],
    yi = logit_jaccard_similarity[lower.tri(logit_jaccard_similarity)],
    sei = se_logit_jaccard_similarity[lower.tri(se_logit_jaccard_similarity)],
    scale1 = rownames(m)[indices[, 1]],
    scale2 = colnames(m)[indices[, 2]],
    pair = variable_pairs
  )

  return(dat_es)
}


rma_jaccard_three_level <- function(effect_sizes, title = "Scales"){
  
  # fit a three-level meta-analysis model accounting for shared scales
  fit <- rma.mv(yi = yi, 
                V = sei^2,
                random = list(~ 1 | scale1, ~ 1 | scale2),
                data = effect_sizes,
                method = "REML")
  
  # create a forest plot
  plot_forest <- 
    metafor::forest(fit, 
                    transf = plogis,
                    slab = effect_sizes$pair,
                    xlim = c(-0.8, 1.7),
                    at = c(0, .2, .4, .6, .8, 1),
                    efac = 0.2,
                    addpred = TRUE,
                    xlab = "Jaccard Similarity",
                    header = c(title, "J [95% CI]"),
                    refline = NA)
  
  # table of results
  results_predictions <- 
    bind_cols(
      fit |>
        predict(transf = plogis, level = 50) |>
        as_tibble() |>
        dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower_50 = pi.lb, pi_upper_50 = pi.ub),
      fit |>
        predict(transf = plogis, level = 95) |>
        as_tibble() |>
        dplyr::select(pi_lower_95 = pi.lb, pi_upper_95 = pi.ub)
    ) |>
    mutate(domain = title) |>
    relocate(domain, .before = J)
  
  return(list(fit = fit,
              predictions = results_predictions,
              forest = plot_forest))
}

Depression

es_depression <- data_depression |>
  escalc_jaccard(domain = "Depression") 

res_depression <- es_depression |>
  rma_jaccard_three_level(title = "Depression scales")

res_depression$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
Depression scales 0.44 0.41 0.46 0.38 0.5 0.29 0.61

Anxiety

es_anxiety <- data_anxiety |>
  escalc_jaccard(domain = "Anxiety") 

res_anxiety <- es_anxiety |>
  rma_jaccard_three_level(title = "Anxiety scales")

res_anxiety$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
Anxiety scales 0.27 0.24 0.29 0.19 0.36 0.1 0.55

Psychosis

es_psychosis <- data_psychosis |>
  escalc_jaccard(domain = "Psychosis")

res_psychosis <- es_psychosis |>
  rma_jaccard_three_level(title = "Psychosis scales")

res_psychosis$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
Psychosis scales 0.21 0.19 0.24 0.14 0.31 0.06 0.53

Hypomania

es_hypomania <- data_hypomania |>
  escalc_jaccard(domain = "(Hypo)mania")

res_hypomania <- es_hypomania |>
  rma_jaccard_three_level(title = "(Hypo)mania scales")

res_hypomania$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
(Hypo)mania scales 0.42 0.39 0.44 0.35 0.48 0.25 0.61

OCD (youth)

es_youth_ocd <- data_youth_ocd |>
  escalc_jaccard(domain = "Youth OCD")

res_youth_ocd <- es_youth_ocd |>
  rma_jaccard_three_level(title = "Youth OCD scales")

res_youth_ocd$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
Youth OCD scales 0.39 0.37 0.41 0.37 0.41 0.33 0.45

Eating disorders

es_eatingdisorders <- data_eatingdisorders |>
  escalc_jaccard(domain = "OCD")

res_eatingdisorders <- es_eatingdisorders |>
  rma_jaccard_three_level(title = "Eating disorders scales")

res_eatingdisorders$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
Eating disorders scales 0.25 0.23 0.27 0.22 0.29 0.16 0.36

4-level meta-analyses across domains

es_combined <- bind_rows(
  es_depression,
  es_anxiety,
  es_psychosis,
  es_hypomania,
  es_eatingdisorders,
  es_youth_ocd
)

rma_jaccard_four_level <- function(effect_sizes){
  
  # Fit a four-level meta-analysis model accounting for shared scales
  fit <- rma.mv(yi = yi,
                V = sei^2,
                random = list(~ 1 | domain/scale1, ~ 1 | domain/scale2),
                data = effect_sizes,
                method = "REML")
  
  # Create a forest plot
  plot_forest <- 
    meta::forest(fit, 
           transf = plogis,
           slab = effect_sizes$pair,
           xlim = c(-0.8, 1.7),
           at = c(0, .2, .4, .6, .8, 1),
           #cex = 0.4,
           efac = 0.2,
           addpred = TRUE,
           xlab = "Jaccard Similarity",
           header = c("Scales", "J [95% CI]"),
           refline = NA)
  
  results_predictions <- 
    bind_cols(
      fit |>
        predict(transf = plogis, level = 50) |>
        as_tibble() |>
        dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower_50 = pi.lb, pi_upper_50 = pi.ub),
      fit |>
        predict(transf = plogis, level = 95) |>
        as_tibble() |>
        dplyr::select(pi_lower_95 = pi.lb, pi_upper_95 = pi.ub)
    ) |>
    mutate(domain = "4-level RE") |>
    relocate(domain, .before = J)
  
  return(list(fit = fit,
              predictions = results_predictions,
              forest = plot_forest))
}

res_combined <- rma_jaccard_four_level(es_combined)

res_combined$predictions |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
4-level RE 0.31 0.29 0.34 0.22 0.42 0.1 0.64
#res_combined$fit
 
# Combine estimates and labels into a data frame for easy viewing
tibble(label = c("domain", "domain/scale1", "domain again", "domain/scale2")) |>
  mutate(logit_sigma = res_combined$fit$sigma2,
         sigma = round_half_up(plogis(logit_sigma), 2)) |>
  filter(label != "domain again") |>
  select(-logit_sigma) |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
label sigma
domain 0.52
domain/scale1 0.54
domain/scale2 0.53

Comparions between domains

3- and 4-level meta

predictions_combined_by_domain <- bind_rows(
  res_depression$predictions,
  res_anxiety$predictions,
  res_psychosis$predictions,
  res_hypomania$predictions,
  res_youth_ocd$predictions,
  res_eatingdisorders$predictions,
  res_combined$predictions
) |>
  mutate(domain = fct_rev(fct_relevel(domain,
                                      "Depression scales",
                                      "Anxiety scales",
                                      "Psychosis scales",
                                      "(Hypo)mania scales",
                                      "Youth OCD scales",
                                      "Eating disorders scales",
                                      "4-level RE")))

predictions_combined_by_domain |>
  mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower_50 pi_upper_50 pi_lower_95 pi_upper_95
Depression scales 0.44 0.41 0.46 0.38 0.50 0.29 0.61
Anxiety scales 0.27 0.24 0.29 0.19 0.36 0.10 0.55
Psychosis scales 0.21 0.19 0.24 0.14 0.31 0.06 0.53
(Hypo)mania scales 0.42 0.39 0.44 0.35 0.48 0.25 0.61
Youth OCD scales 0.39 0.37 0.41 0.37 0.41 0.33 0.45
Eating disorders scales 0.25 0.23 0.27 0.22 0.29 0.16 0.36
4-level RE 0.31 0.29 0.34 0.22 0.42 0.10 0.64
ggplot(predictions_combined_by_domain, aes(J, domain)) +
  # geom_errorbarh(aes(xmin = pi_lower, xmax = pi_upper), linetype = "dotted", height = 0.26) +
  # geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", height = 0.26, size = 0.85) +
  geom_linerange(aes(xmin = pi_lower_95, xmax = pi_upper_95), linetype = "solid", linewidth = 0.3) +
  geom_linerange(aes(xmin = pi_lower_50, xmax = pi_upper_50), linetype = "solid", linewidth = 0.6, color = "red") +
  geom_linerange(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", linewidth = 1.2) +
  geom_point(size = 2.4, color = "red") +
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1), breaks = scales::breaks_pretty(n = 10), name = "Jaccard Similarity") +
  ylab("")

dir.create("plots")

ggsave("plots/plot.pdf",
       width = 6,
       height = 3)

Session info

sessionInfo()
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0    knitr_1.50          scales_1.4.0       
##  [4] ggstance_0.3.7      ggplot2_4.0.1       metafor_4.8-0      
##  [7] numDeriv_2016.8-1.1 metadat_1.4-0       Matrix_1.7-3       
## [10] janitor_2.2.1       proxy_0.4-29        forcats_1.0.1      
## [13] tibble_3.3.0        readr_2.1.5         dplyr_1.1.4        
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       xfun_0.54          bslib_0.9.0        CompQuadForm_1.4.4
##  [5] lattice_0.22-6     mathjaxr_1.8-0     tzdb_0.5.0         Rdpack_2.6.4      
##  [9] vctrs_0.6.5        tools_4.5.0        generics_0.1.4     parallel_4.5.0    
## [13] pkgconfig_2.0.3    RColorBrewer_1.1-3 S7_0.2.1           lifecycle_1.0.4   
## [17] compiler_4.5.0     farver_2.1.2       stringr_1.6.0      textshaping_1.0.3 
## [21] snakecase_0.11.1   htmltools_0.5.9    sass_0.4.10        yaml_2.3.12       
## [25] nloptr_2.2.1       pillar_1.11.1      crayon_1.5.3       jquerylib_0.1.4   
## [29] MASS_7.3-65        cachem_1.1.0       meta_8.2-1         reformulas_0.4.1  
## [33] boot_1.3-31        nlme_3.1-168       tidyselect_1.2.1   digest_0.6.39     
## [37] stringi_1.8.7      purrr_1.2.0        splines_4.5.0      fastmap_1.2.0     
## [41] grid_4.5.0         archive_1.1.12.1   cli_3.6.5          magrittr_2.0.4    
## [45] withr_3.0.2        plotrix_3.8-4      bit64_4.6.0-1      lubridate_1.9.4   
## [49] timechange_0.3.0   rmarkdown_2.30     bit_4.6.0          lme4_1.1-37       
## [53] ragg_1.4.0         hms_1.1.3          evaluate_1.0.5     rbibutils_2.3     
## [57] viridisLite_0.4.2  rlang_1.1.6        Rcpp_1.1.0         glue_1.8.0        
## [61] xml2_1.4.0         minqa_1.2.8        svglite_2.2.1      rstudioapi_0.17.1 
## [65] vroom_1.6.6        jsonlite_2.0.0     R6_2.6.1           systemfonts_1.2.3